Require packages
library(utils)
library(dplyr)
library(ggplot2)
library(maps)
library(stringr)
library(readr)
library(tidyverse)
library(readxl)
library(plotly)
library(MASS)
library(kableExtra)
library(broom)
library(stargazer)
Cleaning Data
#these libraries need to be installed
#library(utils)
#read the Dataset sheet into “R”. The dataset will be called "data".
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na.strings = "", fileEncoding = "UTF-8-BOM")
#library("dplyr") require "dplyr" package for the use of %>%
covid.data = data %>%
rename(Date="dateRep") %>%
rename(Region="countriesAndTerritories") %>%
mutate(Date=as.Date(Date,format="%d/%m/%y"))%>%
mutate(Region = str_replace_all(Region, "_", " "))%>%
group_by(Region) %>%
mutate(cases_Diff = lag(cases)-cases) %>%
mutate(deaths_Diff = lag(deaths)-deaths) %>%
relocate(cases_Diff, .after = cases) %>%
relocate(deaths_Diff, .after = deaths) %>%
drop_na(countryterritoryCode)
Data Summary
#population dataframe
pop.data = unique(data.frame(covid.data$Region,covid.data$popData2019,covid.data$continentExp,covid.data$countryterritoryCode))
names(pop.data)=c("Region","Population2019","Continents","Codes")
pop.data = na.omit(pop.data)
pop.data = unique(pop.data)
#cases dataframe
case.data = data.frame(covid.data$Region,covid.data$cases,covid.data$countryterritoryCode)
names(case.data) = c("Region","cases","Codes")
case.data=aggregate(case.data$cases, by=list(Category=case.data$Region), FUN=sum)
case.data = as.data.frame(cbind(case.data,unique(covid.data$countryterritoryCode)))
names(case.data) = c("Region","cases","Codes")
case.data = na.omit(case.data)
#death dataframe
death.data = data.frame(covid.data$Region,covid.data$deaths,covid.data$countryterritoryCode)
names(death.data) = c("Region","deaths","Codes")
death.data=aggregate(death.data$deaths, by=list(Category=death.data$Region), FUN=sum)
death.data = as.data.frame(cbind(death.data,unique(covid.data$countryterritoryCode)))
names(death.data) = c("Region","deaths","Codes")
death.data = na.omit(death.data)
#Death/Case dataframe
death_case.data = data.frame(case.data$Region,death.data$deaths/case.data$cases,case.data$Codes)
names(death_case.data) = c("Region","Deaths/Case Ratio","Codes")
#death/pop *100
death_pop.data = data.frame(pop.data$Region,death.data$deaths/pop.data$Population2019*100,pop.data$Codes)
names(death_pop.data) = c("Region","Values","Codes")
#Land Area
Land_Area <- read_excel("Land Area.xls")
names(Land_Area) = Land_Area[3,]
Land_Area = Land_Area[-(1:3),]
Land_Area = Land_Area[,c(1,2,62,63)]
#Complete 2018
for (i in 1:nrow(Land_Area)) {
if(is.na(Land_Area$`2018`[i])){
Land_Area$`2018`[i] = Land_Area$`2017`[i]
}
}
Land_Area = na.omit(Land_Area)[,-3]
names(Land_Area)[3] = "Area"
combine = full_join(pop.data, Land_Area, by=c("Codes"="Country Code"))
combine = na.omit(combine)[,-5]
pop.square = na.omit(combine)
pop.square$Area = as.numeric(pop.square$Area)
Ploting data
#Population Ditribution
pop.fig <- plot_ly(pop.data, type='choropleth', locations=pop.data$Codes, z=log2(pop.data$Population2019), text=pop.data$Region, colorscale="Blues",reversescale =T)%>%
layout(title = 'The Logarithm of World Population in 2019')%>%
colorbar(title = "Population Rates",limits = c(15,31))
pop.fig
#Population Density Ditribution
pop.square.fig <- plot_ly(pop.square, type='choropleth', locations=pop.square$Codes, z=log(pop.square$Population2019/pop.square$Area), text=pop.square$Region, colorscale="Blues",reversescale =T)%>%
layout(title = 'The Logarithm of World Population Density in 2019')%>%
colorbar(title = "Density Rates",limits = c(-2,7))
pop.square.fig
#Case Ditribution
case.fig <- plot_ly(case.data, type='choropleth', locations=case.data$Codes, z=log2(case.data$cases), text=case.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Logarithm of World Covid-19 Cases Number')%>%
colorbar(title = "Cases Number",limits = c(4,24))
case.fig
#Death Ditribution
death.fig <- plot_ly(death.data, type='choropleth', locations=death.data$Codes, z=log2(death.data$deaths+1), text=case.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Logarithm of World Covid-19 Deaths Number')%>%
colorbar(title = "Deaths Number")
death.fig
#Death/Case Ditribution
death_case.fig <- plot_ly(death_case.data, type='choropleth', locations=death_case.data$Codes, z=death_case.data$`Deaths/Case Ratio`, text=death_case.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Ratio of World Covid-19 Deaths to Cases Number')%>%
colorbar(title = "Ratio Number",limits = c(0,0.1))
death_case.fig
#Death/pop*100 Ditribution
death_pop.fig <- plot_ly(death_pop.data, type='choropleth', locations=death_pop.data$Codes, z=death_pop.data$`Values`, text=death_pop.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Ratio of World Covid-19 Death/Population*100')%>%
colorbar(title = "Ratio Number")
death_pop.fig
Modeling Preparation
#Creat Density Variable
pop.density = pop.square %>%
mutate(Density = Population2019/Area)
pop.density = pop.density[,c(4,6)]
#Creat data for time series
covid.time = covid.data[,c(1,6,8:9,11:12)]%>%
rename(Codes="countryterritoryCode")%>%
inner_join(pop.density)%>%
mutate(Region = as.factor(Region))%>%
mutate(group_id = as.integer(Region))
groups = length(unique(covid.time$Region))
#Lag cases and deaths Difference
for (k in 1:14) {
CASE=NULL
for (j in 1:groups) {
CASE=c(CASE,lag(covid.time$cases_Diff[covid.time$group_id==j],k))
}
covid.time = cbind(covid.time,CASE)
}
names(covid.time)[9:22]=paste("CASE_DIFF",1:14,sep="-")
for (k in 1:14) {
DEATH=NULL
for (j in 1:groups) {
DEATH=c(DEATH,lag(covid.time$deaths_Diff[covid.time$group_id==j],k))
}
covid.time = cbind(covid.time,DEATH)
}
names(covid.time)[23:36]=paste("DEATH_DIFF",1:14,sep="-")
Modeling
#Cases on Cases
data1 = covid.time[,c(2,9:22)]
fit1.m1 <- lm(cases_Diff~., data=na.omit(data1))
fit2.m1 <- lm(cases_Diff~1, data=na.omit(data1))
step.m1 = stepAIC(fit2.m1,direction="both",scope=list(upper=fit1.m1,lower=fit2.m1),trace = FALSE)
summary(step.m1)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-13` + `CASE_DIFF-6` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5`, data = na.omit(data1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -52123 -23 -14 -3 42077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.151585 3.775670 3.748 0.000178 ***
## `CASE_DIFF-7` 0.488981 0.004653 105.094 < 2e-16 ***
## `CASE_DIFF-14` 0.119312 0.003501 34.080 < 2e-16 ***
## `CASE_DIFF-1` -0.543055 0.004280 -126.890 < 2e-16 ***
## `CASE_DIFF-8` 0.363409 0.004576 79.416 < 2e-16 ***
## `CASE_DIFF-2` -0.422698 0.004720 -89.547 < 2e-16 ***
## `CASE_DIFF-9` 0.222387 0.004413 50.397 < 2e-16 ***
## `CASE_DIFF-3` -0.277095 0.004681 -59.200 < 2e-16 ***
## `CASE_DIFF-12` -0.105906 0.003722 -28.450 < 2e-16 ***
## `CASE_DIFF-13` -0.103427 0.003910 -26.450 < 2e-16 ***
## `CASE_DIFF-6` 0.068494 0.004885 14.020 < 2e-16 ***
## `CASE_DIFF-4` -0.131883 0.004187 -31.500 < 2e-16 ***
## `CASE_DIFF-10` 0.116314 0.003875 30.016 < 2e-16 ***
## `CASE_DIFF-5` -0.066490 0.004783 -13.901 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 863.4 on 52547 degrees of freedom
## Multiple R-squared: 0.5778, Adjusted R-squared: 0.5777
## F-statistic: 5531 on 13 and 52547 DF, p-value: < 2.2e-16
#Cases & Deaths on Cases
data2 = covid.time[,c(2,9:36)]
fit1.m2 <- lm(cases_Diff~., data=na.omit(data2))
fit2.m2 <- lm(cases_Diff~1, data=na.omit(data2))
step.m2 = stepAIC(fit2.m2,direction="both",scope=list(upper=fit1.m2,lower=fit2.m2),trace = FALSE)
summary(step.m2)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` + `DEATH_DIFF-13` +
## `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`, data = na.omit(data2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -52537 -22 -13 -2 40650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.198553 3.758951 3.511 0.000446 ***
## `CASE_DIFF-7` 0.464047 0.004811 96.449 < 2e-16 ***
## `CASE_DIFF-14` 0.113585 0.003660 31.035 < 2e-16 ***
## `CASE_DIFF-1` -0.550542 0.004326 -127.254 < 2e-16 ***
## `CASE_DIFF-8` 0.338636 0.004887 69.288 < 2e-16 ***
## `CASE_DIFF-2` -0.430428 0.004897 -87.889 < 2e-16 ***
## `CASE_DIFF-9` 0.206549 0.005083 40.636 < 2e-16 ***
## `CASE_DIFF-3` -0.279987 0.005134 -54.536 < 2e-16 ***
## `CASE_DIFF-12` -0.110360 0.004878 -22.622 < 2e-16 ***
## `CASE_DIFF-11` -0.009577 0.005056 -1.894 0.058225 .
## `CASE_DIFF-13` -0.110529 0.004402 -25.109 < 2e-16 ***
## `CASE_DIFF-6` 0.052636 0.005030 10.465 < 2e-16 ***
## `CASE_DIFF-4` -0.131576 0.005221 -25.200 < 2e-16 ***
## `CASE_DIFF-10` 0.106421 0.005124 20.769 < 2e-16 ***
## `CASE_DIFF-5` -0.073953 0.005208 -14.200 < 2e-16 ***
## `DEATH_DIFF-8` 2.087797 0.109920 18.994 < 2e-16 ***
## `DEATH_DIFF-7` 1.903887 0.101099 18.832 < 2e-16 ***
## `DEATH_DIFF-6` 1.261693 0.089718 14.063 < 2e-16 ***
## `DEATH_DIFF-5` 0.656150 0.070279 9.336 < 2e-16 ***
## `DEATH_DIFF-9` 1.566417 0.113954 13.746 < 2e-16 ***
## `DEATH_DIFF-10` 1.405313 0.113072 12.428 < 2e-16 ***
## `DEATH_DIFF-11` 1.423336 0.108800 13.082 < 2e-16 ***
## `DEATH_DIFF-12` 1.063922 0.100610 10.575 < 2e-16 ***
## `DEATH_DIFF-13` 0.898552 0.089345 10.057 < 2e-16 ***
## `DEATH_DIFF-14` 0.613926 0.069571 8.824 < 2e-16 ***
## `DEATH_DIFF-2` 0.273820 0.062905 4.353 1.35e-05 ***
## `DEATH_DIFF-1` 0.192886 0.063258 3.049 0.002296 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 859.2 on 52534 degrees of freedom
## Multiple R-squared: 0.582, Adjusted R-squared: 0.5817
## F-statistic: 2813 on 26 and 52534 DF, p-value: < 2.2e-16
#Compare models
anova(step.m1,step.m2)
## Analysis of Variance Table
##
## Model 1: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-13` + `CASE_DIFF-6` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5`
## Model 2: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` + `DEATH_DIFF-13` +
## `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 52547 3.9167e+10
## 2 52534 3.8779e+10 13 388052910 40.438 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| Model Name | Res.DF | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|---|
| 1 | 52547 | 39.17 | ||||
| 2 | 52534 | 38.78 | 13 | 388052910 | 40.438 | <2.2e-16 *** |
#Cases, Deaths & Population on Cases
data3 = covid.time[,c(2,6,9:36)]
fit1.m3 <- lm(cases_Diff~., data=na.omit(data3))
fit2.m3 <- lm(cases_Diff~1, data=na.omit(data3))
step.m3 = stepAIC(fit2.m3,direction="both",scope=list(upper=fit1.m3,lower=fit2.m3),trace = FALSE)
summary(step.m3)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## popData2019 + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`,
## data = na.omit(data3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -52549 -22 -10 0 40623
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.294e+00 3.890e+00 2.389 0.01688 *
## `CASE_DIFF-7` 4.630e-01 4.818e-03 96.112 < 2e-16 ***
## `CASE_DIFF-14` 1.133e-01 3.660e-03 30.952 < 2e-16 ***
## `CASE_DIFF-1` -5.509e-01 4.326e-03 -127.322 < 2e-16 ***
## `CASE_DIFF-8` 3.376e-01 4.893e-03 68.997 < 2e-16 ***
## `CASE_DIFF-2` -4.308e-01 4.898e-03 -87.965 < 2e-16 ***
## `CASE_DIFF-9` 2.056e-01 5.088e-03 40.418 < 2e-16 ***
## `CASE_DIFF-3` -2.805e-01 5.135e-03 -54.629 < 2e-16 ***
## `CASE_DIFF-12` -1.109e-01 4.880e-03 -22.734 < 2e-16 ***
## `CASE_DIFF-11` -1.027e-02 5.059e-03 -2.030 0.04235 *
## `CASE_DIFF-13` -1.110e-01 4.403e-03 -25.202 < 2e-16 ***
## `CASE_DIFF-6` 5.172e-02 5.035e-03 10.273 < 2e-16 ***
## `CASE_DIFF-4` -1.323e-01 5.223e-03 -25.319 < 2e-16 ***
## `CASE_DIFF-10` 1.056e-01 5.128e-03 20.597 < 2e-16 ***
## `CASE_DIFF-5` -7.473e-02 5.211e-03 -14.341 < 2e-16 ***
## `DEATH_DIFF-8` 2.086e+00 1.099e-01 18.984 < 2e-16 ***
## `DEATH_DIFF-7` 1.902e+00 1.011e-01 18.817 < 2e-16 ***
## `DEATH_DIFF-6` 1.260e+00 8.971e-02 14.048 < 2e-16 ***
## `DEATH_DIFF-5` 6.551e-01 7.027e-02 9.323 < 2e-16 ***
## `DEATH_DIFF-9` 1.565e+00 1.139e-01 13.739 < 2e-16 ***
## popData2019 9.273e-08 2.380e-08 3.897 9.75e-05 ***
## `DEATH_DIFF-10` 1.404e+00 1.131e-01 12.422 < 2e-16 ***
## `DEATH_DIFF-11` 1.423e+00 1.088e-01 13.077 < 2e-16 ***
## `DEATH_DIFF-12` 1.064e+00 1.006e-01 10.572 < 2e-16 ***
## `DEATH_DIFF-13` 8.986e-01 8.933e-02 10.059 < 2e-16 ***
## `DEATH_DIFF-14` 6.141e-01 6.956e-02 8.828 < 2e-16 ***
## `DEATH_DIFF-2` 2.736e-01 6.290e-02 4.349 1.37e-05 ***
## `DEATH_DIFF-1` 1.924e-01 6.325e-02 3.043 0.00235 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 859.1 on 52533 degrees of freedom
## Multiple R-squared: 0.5821, Adjusted R-squared: 0.5819
## F-statistic: 2710 on 27 and 52533 DF, p-value: < 2.2e-16
#Compare models
anova(step.m2,step.m3)
## Analysis of Variance Table
##
## Model 1: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` + `DEATH_DIFF-13` +
## `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`
## Model 2: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## popData2019 + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 52534 3.8779e+10
## 2 52533 3.8768e+10 1 11207507 15.187 9.75e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| Model Name | Res.DF | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|---|
| 2 | 52534 | 38.78 | ||||
| 3 | 52533 | 38.77 | 1 | 11207507 | 15.187 | 9.75e-05 *** |
#Cases, Deaths & Population on Cases
data4 = covid.time[,c(2,7,9:36)]
fit1.m4 <- lm(cases_Diff~., data=na.omit(data4))
fit2.m4 <- lm(cases_Diff~1, data=na.omit(data4))
step.m4 = stepAIC(fit2.m4,direction="both",scope=list(upper=fit1.m4,lower=fit2.m4),trace = FALSE)
summary(step.m4)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` + `DEATH_DIFF-13` +
## `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`, data = na.omit(data4))
##
## Residuals:
## Min 1Q Median 3Q Max
## -52537 -22 -13 -2 40650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.198553 3.758951 3.511 0.000446 ***
## `CASE_DIFF-7` 0.464047 0.004811 96.449 < 2e-16 ***
## `CASE_DIFF-14` 0.113585 0.003660 31.035 < 2e-16 ***
## `CASE_DIFF-1` -0.550542 0.004326 -127.254 < 2e-16 ***
## `CASE_DIFF-8` 0.338636 0.004887 69.288 < 2e-16 ***
## `CASE_DIFF-2` -0.430428 0.004897 -87.889 < 2e-16 ***
## `CASE_DIFF-9` 0.206549 0.005083 40.636 < 2e-16 ***
## `CASE_DIFF-3` -0.279987 0.005134 -54.536 < 2e-16 ***
## `CASE_DIFF-12` -0.110360 0.004878 -22.622 < 2e-16 ***
## `CASE_DIFF-11` -0.009577 0.005056 -1.894 0.058225 .
## `CASE_DIFF-13` -0.110529 0.004402 -25.109 < 2e-16 ***
## `CASE_DIFF-6` 0.052636 0.005030 10.465 < 2e-16 ***
## `CASE_DIFF-4` -0.131576 0.005221 -25.200 < 2e-16 ***
## `CASE_DIFF-10` 0.106421 0.005124 20.769 < 2e-16 ***
## `CASE_DIFF-5` -0.073953 0.005208 -14.200 < 2e-16 ***
## `DEATH_DIFF-8` 2.087797 0.109920 18.994 < 2e-16 ***
## `DEATH_DIFF-7` 1.903887 0.101099 18.832 < 2e-16 ***
## `DEATH_DIFF-6` 1.261693 0.089718 14.063 < 2e-16 ***
## `DEATH_DIFF-5` 0.656150 0.070279 9.336 < 2e-16 ***
## `DEATH_DIFF-9` 1.566417 0.113954 13.746 < 2e-16 ***
## `DEATH_DIFF-10` 1.405313 0.113072 12.428 < 2e-16 ***
## `DEATH_DIFF-11` 1.423336 0.108800 13.082 < 2e-16 ***
## `DEATH_DIFF-12` 1.063922 0.100610 10.575 < 2e-16 ***
## `DEATH_DIFF-13` 0.898552 0.089345 10.057 < 2e-16 ***
## `DEATH_DIFF-14` 0.613926 0.069571 8.824 < 2e-16 ***
## `DEATH_DIFF-2` 0.273820 0.062905 4.353 1.35e-05 ***
## `DEATH_DIFF-1` 0.192886 0.063258 3.049 0.002296 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 859.2 on 52534 degrees of freedom
## Multiple R-squared: 0.582, Adjusted R-squared: 0.5817
## F-statistic: 2813 on 26 and 52534 DF, p-value: < 2.2e-16
#Compare models
anova(step.m3,step.m4)
## Analysis of Variance Table
##
## Model 1: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## popData2019 + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`
## Model 2: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` + `DEATH_DIFF-13` +
## `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 52533 3.8768e+10
## 2 52534 3.8779e+10 -1 -11207507 15.187 9.75e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| Model Name | Res.DF | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|---|
| 3 | 52533 | 38.77 | ||||
| 4 | 52534 | 38.78 | -1 | -11207507 | 15.187 | 9.75e-05 *** |
Final Model
summary(step.m3)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-3` +
## `CASE_DIFF-12` + `CASE_DIFF-11` + `CASE_DIFF-13` + `CASE_DIFF-6` +
## `CASE_DIFF-4` + `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` +
## `DEATH_DIFF-7` + `DEATH_DIFF-6` + `DEATH_DIFF-5` + `DEATH_DIFF-9` +
## popData2019 + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-2` + `DEATH_DIFF-1`,
## data = na.omit(data3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -52549 -22 -10 0 40623
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.294e+00 3.890e+00 2.389 0.01688 *
## `CASE_DIFF-7` 4.630e-01 4.818e-03 96.112 < 2e-16 ***
## `CASE_DIFF-14` 1.133e-01 3.660e-03 30.952 < 2e-16 ***
## `CASE_DIFF-1` -5.509e-01 4.326e-03 -127.322 < 2e-16 ***
## `CASE_DIFF-8` 3.376e-01 4.893e-03 68.997 < 2e-16 ***
## `CASE_DIFF-2` -4.308e-01 4.898e-03 -87.965 < 2e-16 ***
## `CASE_DIFF-9` 2.056e-01 5.088e-03 40.418 < 2e-16 ***
## `CASE_DIFF-3` -2.805e-01 5.135e-03 -54.629 < 2e-16 ***
## `CASE_DIFF-12` -1.109e-01 4.880e-03 -22.734 < 2e-16 ***
## `CASE_DIFF-11` -1.027e-02 5.059e-03 -2.030 0.04235 *
## `CASE_DIFF-13` -1.110e-01 4.403e-03 -25.202 < 2e-16 ***
## `CASE_DIFF-6` 5.172e-02 5.035e-03 10.273 < 2e-16 ***
## `CASE_DIFF-4` -1.323e-01 5.223e-03 -25.319 < 2e-16 ***
## `CASE_DIFF-10` 1.056e-01 5.128e-03 20.597 < 2e-16 ***
## `CASE_DIFF-5` -7.473e-02 5.211e-03 -14.341 < 2e-16 ***
## `DEATH_DIFF-8` 2.086e+00 1.099e-01 18.984 < 2e-16 ***
## `DEATH_DIFF-7` 1.902e+00 1.011e-01 18.817 < 2e-16 ***
## `DEATH_DIFF-6` 1.260e+00 8.971e-02 14.048 < 2e-16 ***
## `DEATH_DIFF-5` 6.551e-01 7.027e-02 9.323 < 2e-16 ***
## `DEATH_DIFF-9` 1.565e+00 1.139e-01 13.739 < 2e-16 ***
## popData2019 9.273e-08 2.380e-08 3.897 9.75e-05 ***
## `DEATH_DIFF-10` 1.404e+00 1.131e-01 12.422 < 2e-16 ***
## `DEATH_DIFF-11` 1.423e+00 1.088e-01 13.077 < 2e-16 ***
## `DEATH_DIFF-12` 1.064e+00 1.006e-01 10.572 < 2e-16 ***
## `DEATH_DIFF-13` 8.986e-01 8.933e-02 10.059 < 2e-16 ***
## `DEATH_DIFF-14` 6.141e-01 6.956e-02 8.828 < 2e-16 ***
## `DEATH_DIFF-2` 2.736e-01 6.290e-02 4.349 1.37e-05 ***
## `DEATH_DIFF-1` 1.924e-01 6.325e-02 3.043 0.00235 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 859.1 on 52533 degrees of freedom
## Multiple R-squared: 0.5821, Adjusted R-squared: 0.5819
## F-statistic: 2710 on 27 and 52533 DF, p-value: < 2.2e-16
Model Presenting
stargazer(step.m1, step.m2, title = "Results of Model 1 & 2", align = T,type = "text" )
##
## Results of Model 1 & 2
## ===============================================================================
## Dependent variable:
## -----------------------------------------------------------
## cases_Diff
## (1) (2)
## -------------------------------------------------------------------------------
## `CASE_DIFF-7` 0.489*** 0.464***
## (0.005) (0.005)
##
## `CASE_DIFF-14` 0.119*** 0.114***
## (0.004) (0.004)
##
## `CASE_DIFF-1` -0.543*** -0.551***
## (0.004) (0.004)
##
## `CASE_DIFF-8` 0.363*** 0.339***
## (0.005) (0.005)
##
## `CASE_DIFF-2` -0.423*** -0.430***
## (0.005) (0.005)
##
## `CASE_DIFF-9` 0.222*** 0.207***
## (0.004) (0.005)
##
## `CASE_DIFF-3` -0.277*** -0.280***
## (0.005) (0.005)
##
## `CASE_DIFF-12` -0.106*** -0.110***
## (0.004) (0.005)
##
## `CASE_DIFF-11` -0.010*
## (0.005)
##
## `CASE_DIFF-13` -0.103*** -0.111***
## (0.004) (0.004)
##
## `CASE_DIFF-6` 0.068*** 0.053***
## (0.005) (0.005)
##
## `CASE_DIFF-4` -0.132*** -0.132***
## (0.004) (0.005)
##
## `CASE_DIFF-10` 0.116*** 0.106***
## (0.004) (0.005)
##
## `CASE_DIFF-5` -0.066*** -0.074***
## (0.005) (0.005)
##
## `DEATH_DIFF-8` 2.088***
## (0.110)
##
## `DEATH_DIFF-7` 1.904***
## (0.101)
##
## `DEATH_DIFF-6` 1.262***
## (0.090)
##
## `DEATH_DIFF-5` 0.656***
## (0.070)
##
## `DEATH_DIFF-9` 1.566***
## (0.114)
##
## `DEATH_DIFF-10` 1.405***
## (0.113)
##
## `DEATH_DIFF-11` 1.423***
## (0.109)
##
## `DEATH_DIFF-12` 1.064***
## (0.101)
##
## `DEATH_DIFF-13` 0.899***
## (0.089)
##
## `DEATH_DIFF-14` 0.614***
## (0.070)
##
## `DEATH_DIFF-2` 0.274***
## (0.063)
##
## `DEATH_DIFF-1` 0.193***
## (0.063)
##
## Constant 14.152*** 13.199***
## (3.776) (3.759)
##
## -------------------------------------------------------------------------------
## Observations 52,561 52,561
## R2 0.578 0.582
## Adjusted R2 0.578 0.582
## Residual Std. Error 863.350 (df = 52547) 859.169 (df = 52534)
## F Statistic 5,531.152*** (df = 13; 52547) 2,812.779*** (df = 26; 52534)
## ===============================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(step.m3, step.m4, title = "Results of Model 3 & 4", align = T,type = "text" )
##
## Results of Model 3 & 4
## ===============================================================================
## Dependent variable:
## -----------------------------------------------------------
## cases_Diff
## (1) (2)
## -------------------------------------------------------------------------------
## `CASE_DIFF-7` 0.463*** 0.464***
## (0.005) (0.005)
##
## `CASE_DIFF-14` 0.113*** 0.114***
## (0.004) (0.004)
##
## `CASE_DIFF-1` -0.551*** -0.551***
## (0.004) (0.004)
##
## `CASE_DIFF-8` 0.338*** 0.339***
## (0.005) (0.005)
##
## `CASE_DIFF-2` -0.431*** -0.430***
## (0.005) (0.005)
##
## `CASE_DIFF-9` 0.206*** 0.207***
## (0.005) (0.005)
##
## `CASE_DIFF-3` -0.281*** -0.280***
## (0.005) (0.005)
##
## `CASE_DIFF-12` -0.111*** -0.110***
## (0.005) (0.005)
##
## `CASE_DIFF-11` -0.010** -0.010*
## (0.005) (0.005)
##
## `CASE_DIFF-13` -0.111*** -0.111***
## (0.004) (0.004)
##
## `CASE_DIFF-6` 0.052*** 0.053***
## (0.005) (0.005)
##
## `CASE_DIFF-4` -0.132*** -0.132***
## (0.005) (0.005)
##
## `CASE_DIFF-10` 0.106*** 0.106***
## (0.005) (0.005)
##
## `CASE_DIFF-5` -0.075*** -0.074***
## (0.005) (0.005)
##
## `DEATH_DIFF-8` 2.086*** 2.088***
## (0.110) (0.110)
##
## `DEATH_DIFF-7` 1.902*** 1.904***
## (0.101) (0.101)
##
## `DEATH_DIFF-6` 1.260*** 1.262***
## (0.090) (0.090)
##
## `DEATH_DIFF-5` 0.655*** 0.656***
## (0.070) (0.070)
##
## `DEATH_DIFF-9` 1.565*** 1.566***
## (0.114) (0.114)
##
## popData2019 0.00000***
## (0.00000)
##
## `DEATH_DIFF-10` 1.404*** 1.405***
## (0.113) (0.113)
##
## `DEATH_DIFF-11` 1.423*** 1.423***
## (0.109) (0.109)
##
## `DEATH_DIFF-12` 1.064*** 1.064***
## (0.101) (0.101)
##
## `DEATH_DIFF-13` 0.899*** 0.899***
## (0.089) (0.089)
##
## `DEATH_DIFF-14` 0.614*** 0.614***
## (0.070) (0.070)
##
## `DEATH_DIFF-2` 0.274*** 0.274***
## (0.063) (0.063)
##
## `DEATH_DIFF-1` 0.192*** 0.193***
## (0.063) (0.063)
##
## Constant 9.294** 13.199***
## (3.890) (3.759)
##
## -------------------------------------------------------------------------------
## Observations 52,561 52,561
## R2 0.582 0.582
## Adjusted R2 0.582 0.582
## Residual Std. Error 859.053 (df = 52533) 859.169 (df = 52534)
## F Statistic 2,709.896*** (df = 27; 52533) 2,812.779*** (df = 26; 52534)
## ===============================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(step.m1, step.m2, step.m3, title = "Results of Model 1 & 2 & 3", align = T,type = "text")
##
## Results of Model 1 & 2 & 3
## =============================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------------
## cases_Diff
## (1) (2) (3)
## -------------------------------------------------------------------------------------------------------------
## `CASE_DIFF-7` 0.489*** 0.464*** 0.463***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-14` 0.119*** 0.114*** 0.113***
## (0.004) (0.004) (0.004)
##
## `CASE_DIFF-1` -0.543*** -0.551*** -0.551***
## (0.004) (0.004) (0.004)
##
## `CASE_DIFF-8` 0.363*** 0.339*** 0.338***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-2` -0.423*** -0.430*** -0.431***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-9` 0.222*** 0.207*** 0.206***
## (0.004) (0.005) (0.005)
##
## `CASE_DIFF-3` -0.277*** -0.280*** -0.281***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-12` -0.106*** -0.110*** -0.111***
## (0.004) (0.005) (0.005)
##
## `CASE_DIFF-11` -0.010* -0.010**
## (0.005) (0.005)
##
## `CASE_DIFF-13` -0.103*** -0.111*** -0.111***
## (0.004) (0.004) (0.004)
##
## `CASE_DIFF-6` 0.068*** 0.053*** 0.052***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-4` -0.132*** -0.132*** -0.132***
## (0.004) (0.005) (0.005)
##
## `CASE_DIFF-10` 0.116*** 0.106*** 0.106***
## (0.004) (0.005) (0.005)
##
## `CASE_DIFF-5` -0.066*** -0.074*** -0.075***
## (0.005) (0.005) (0.005)
##
## `DEATH_DIFF-8` 2.088*** 2.086***
## (0.110) (0.110)
##
## `DEATH_DIFF-7` 1.904*** 1.902***
## (0.101) (0.101)
##
## `DEATH_DIFF-6` 1.262*** 1.260***
## (0.090) (0.090)
##
## `DEATH_DIFF-5` 0.656*** 0.655***
## (0.070) (0.070)
##
## `DEATH_DIFF-9` 1.566*** 1.565***
## (0.114) (0.114)
##
## popData2019 0.00000***
## (0.00000)
##
## `DEATH_DIFF-10` 1.405*** 1.404***
## (0.113) (0.113)
##
## `DEATH_DIFF-11` 1.423*** 1.423***
## (0.109) (0.109)
##
## `DEATH_DIFF-12` 1.064*** 1.064***
## (0.101) (0.101)
##
## `DEATH_DIFF-13` 0.899*** 0.899***
## (0.089) (0.089)
##
## `DEATH_DIFF-14` 0.614*** 0.614***
## (0.070) (0.070)
##
## `DEATH_DIFF-2` 0.274*** 0.274***
## (0.063) (0.063)
##
## `DEATH_DIFF-1` 0.193*** 0.192***
## (0.063) (0.063)
##
## Constant 14.152*** 13.199*** 9.294**
## (3.776) (3.759) (3.890)
##
## -------------------------------------------------------------------------------------------------------------
## Observations 52,561 52,561 52,561
## R2 0.578 0.582 0.582
## Adjusted R2 0.578 0.582 0.582
## Residual Std. Error 863.350 (df = 52547) 859.169 (df = 52534) 859.053 (df = 52533)
## F Statistic 5,531.152*** (df = 13; 52547) 2,812.779*** (df = 26; 52534) 2,709.896*** (df = 27; 52533)
## =============================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01